
********************construction of h index*********************
use people.dta, clear
drop if missing==1
keep id_prof cognome nome ssd submacro
egen tmp=tag(id_prof)
keep if tmp==1
drop tmp 
sort id_prof
save names, replace

use citations.dta, clear
ren ssd ssdart

ren output2 num_cit

sort id_prof

merge m:1 id_prof using names


*********************people who do not have an ISI publication is by construction assigned a h-index equal to zero
g H=.
g rH=.
replace H=0 if _merge==2
replace rH=0 if _merge==2
drop _merge
scalar drop _all
g age_art=2012-year_pub

* step 1 - collapse citations by year of publications
* define w the reiteration for each submacro - define x(row) as year of publication - define y (column) as age of the article 
* preventing the possibility of not observing publications/citations in a specific year in a specific submacro
fillin id_prof year

egen tmp=max(submacro), by(id_prof)

replace submacro=tmp if submacro==.

drop tmp

replace num_cit=0 if _fillin==1


* creation of room for additional variables
local z=2012-1990

forvalues y = 1/ `z' {
  gen num_cit_a`y'=.
}

* summing citations over submacro to create the age profile of appearance of citations
egen sum_cit=sum(num_cit), by(submacro year_pub)

egen mean_cit=mean(num_cit), by(submacro year_pub)


* setting the max code of submacro number 
sum submacro

scalar submacromax=r(max)

scalar submacromin=r(min)


local n=submacromax

local m=submacromin

forvalues w = `m' / `n' {
  forvalues x = 1990/2011 {
  egen d`x'=sum(mean_cit) if year_pub>=`x'&submacro==`w'
  sum d`x' if year_pub==`x'&submacro==`w'
  scalar den`x'=r(mean)
  local y=2012-`x'
  scalar num`y'=r(mean)
  }
 
* step 2 - building shares
forvalues x = 1990/2011 {
 local y=2012-`x'
 forvalues y = 1/`y'  {
 scalar sh`x'_`y'=(num`y'/den`x')
 }
 }
* step 3 - partitioning of citation
forvalues x = 1990/2011 {
 local y=2012-`x'
 forvalues y = 1/`y'  {
 replace num_cit_a`y'=num_cit*sh`x'_`y' if year_pub==`x'&submacro==`w'
 }
 }
drop d*
}

sort id_prof year_pub
br id_prof cognome nome submacro year_pub num_cit num_cit_a* 

* step 4 - assigning of citations to id_profs
forvalues x = 1990/2011 {
  gen num_cit_y`x'=.
}


forvalues x = 1990/2011 {
  local k =2012-`x'
  forvalues y = 1/`k' {
  local z =`y'+`x'-1

dis `z' " " `y' " " `x' 
 replace num_cit_y`z'=num_cit_a`y' if year_pub==`x'
  }
}


**************************************************************************************
set seed 1

gen double random = (-1 + 2*runiform() )/1000

forvalues x = 1990/2011 {
  replace num_cit_y`x'=0 if num_cit_y`x'==.
  replace num_cit_y`x'=num_cit_y`x'+random
  egen rank`x'=rank(num_cit_y`x'), by(id_prof) field
  gen high`x'=num_cit_y`x'>=rank`x'
  egen h`x'=sum(high`x'), by(id_prof)
  gen leadh`x'=h`x'+1
  gen tmp1`x'=0
  replace tmp1`x'=1 if rank`x' == h`x'
 gen tmp2`x'=tmp1`x'*num_cit_y`x'
 egen Ph`x'=max(tmp2`x'), by(id_prof)
 gen tmp3`x'=0
 replace tmp3`x'=1 if rank`x' == leadh`x'
 gen tmp4`x'=tmp3`x'*num_cit_y`x'
 egen leadPh`x'=max(tmp4`x'), by(id_prof)
 gen rh`x' = (leadh`x'*Ph`x' - h`x'*leadPh`x')/(1-leadPh`x'+Ph`x')
 drop tmp*
 replace H=h`x' if year_pub==`x'
 replace rH=rh`x' if year_pub==`x'
}



*** data structure by papers
drop rank* high* h* rh* leadh* Ph* leadPh*

save hindexpaper, replace

*** data structure by people
collapse H rH submacro , by(id_prof year_pub)

label value submacro submacro

ren year_pub anno

sort id_prof

drop if anno==.

drop if id_prof==.

save hindex, replace
